---
title: "results plots 2303version"
author: "Yanran"
date: "2023/02/28"
output: html_document
---

library imported
```{r libraries message=FALSE, warning=FALSE}
library(dplyr)
library(reshape2)
library(ggplot2)
library(cowplot)
library(rstudioapi)
library(scales)
library(xtable)
library(tidycensus)
library(sf)
library(tigris)
library(plyr)
library(tidyr)
library(RColorBrewer)
```

# Figure 1 (new!)
## Maps of CT percent Black in MA and GA (top row), Boston and Atlanta (bottom row). 
Note that Atlanta is Fulton County GA, county FIPS code is 13121.
-Use a common (discrete) scale in all maps to illustrate the difference in % Black in MA and GA

```{r fig1 message=FALSE, warning=FALSE}


```



# Figure 3: Coefficient bias boxplots for MA and GA
Changes to the current figure:
-Add a panel below with the results for GA
-Use the “Set1” color palette in RColorBrewer
-Remove the beta0 plot (I don’t think we need to show the intercept)
-Are we plotting percents for both beta1 and beta2? It seems strange that there’s so much larger percent bias in beta1 compared to beta2.
-Instead of the labels beta1 and beta2, use "Black vs NHW" and "Poverty"
-Remove the x-axis labels (the beta1_bias and beta2_bias)
### boxplot of 3 coefficients $\beta_0$, $\beta_1$, $\beta_2$

```{r message=FALSE, warning=FALSE}
## set working directory to wherever this file is located ##
current_path <- getActiveDocumentContext()$path
setwd(dirname(current_path ))


load("../results_manu/coef10.RData")
ce_sim <- cars
load("../results_manu/coef2.RData")
dp_sim <- cars
load("../results_manu/coef3.RData")
dp0527_sim <- cars
# load("../results_manu/coef4.RData")
# dp22_sim <- cars
load("../results_manu/coef5.RData")
dp0825_sim <- cars

load("../results_manu/GA/coef1.RData")
ga_ce_sim <- cars
load("../results_manu/GA/coef2.RData")
ga_dp_sim <- cars
load("../results_manu/GA/coef3.RData")
ga_dp0527_sim <- cars
load("../results_manu/GA/coef5.RData")
ga_dp0825_sim <- cars


load("../GA/sim/bw_adat5.RData")
adat_ga <- adat
names(adat) 

load("../bw_adat5.RData")
names(adat) 


```


```{r message=FALSE, warning=FALSE}
## coef bias mape##
ce_sim <- ce_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
dp_sim <- dp_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
dp0527_sim <- dp0527_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01)
#dp22_sim <- dp22_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
dp0825_sim <- dp0825_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 

box_compare1 <- ce_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
#box_compare1 <- ce_sim[,c("beta0", "beta1", "beta2")] 
box_compare1 <- box_compare1 %>% mutate(Data="DC") 
#box_compare2 <- dp_sim[,c("beta0", "beta1", "beta2")] 
box_compare2 <- dp_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
box_compare2 <- box_compare2 %>% mutate(Data="DP19")
box_compare3 <- dp0527_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
#box_compare3 <- dp0527_sim[,c("beta0", "beta1", "beta2")] 
box_compare3 <- box_compare3 %>% mutate(Data="DP20")
box_compare5 <- dp0825_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
box_compare5 <- box_compare5 %>% mutate(Data="DP22")
box_compare <- melt(rbind(box_compare1, box_compare2, box_compare3, box_compare5))

ggplot(box_compare, aes(x=variable,y=value, fill=Data)) +
    geom_boxplot()
par(mfrow=c(1,2))
box0 <- box_compare[box_compare$variable=="beta0_bias",]
b0<- ggplot(box0, aes(x=variable,y=value, fill=Data)) + xlab(NULL)+ ylab(NULL)+scale_y_continuous(labels = percent_format())+ggtitle("") +
    geom_boxplot()+
  scale_fill_brewer(palette = "Set1")
box1 <- box_compare[box_compare$variable=="beta1_bias",]
b1<- ggplot(box1, aes(x=variable,y=value, fill=Data)) + xlab(NULL)+ ylab("MA")+scale_y_continuous(labels = percent_format())+ggtitle("") +
  #ggtitle("Black vs NHW") +
    geom_boxplot()+theme_linedraw()+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")+ coord_cartesian(ylim=c(-0.15, 0.15))
box2 <- box_compare[box_compare$variable=="beta2_bias",]
b2<- ggplot(box2, aes(x=variable,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+scale_y_continuous(labels = percent_format())+ggtitle("")+
  #ggtitle("Poverty")+
    geom_boxplot()+theme_linedraw()+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")+ coord_cartesian(ylim=c(-0.006, 0.006))


############
### GA #####
############

## coef bias ##
ga_ce_sim <- ga_ce_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
ga_dp_sim <- ga_dp_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
ga_dp0527_sim <- ga_dp0527_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01)
ga_dp0825_sim <- ga_dp0825_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 

ga_box_compare1 <- ga_ce_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
ga_box_compare1 <- ga_box_compare1 %>% mutate(Data="DC") 
ga_box_compare2 <- ga_dp_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
ga_box_compare2 <- ga_box_compare2 %>% mutate(Data="DP19")
ga_box_compare3 <- ga_dp0527_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
ga_box_compare3 <- ga_box_compare3 %>% mutate(Data="DP20")
ga_box_compare5 <- ga_dp0825_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
ga_box_compare5 <- ga_box_compare5 %>% mutate(Data="DP22")
ga_box_compare <- melt(rbind(ga_box_compare1, ga_box_compare2, ga_box_compare3, ga_box_compare5))

ggplot(ga_box_compare, aes(x=variable,y=value, fill=Data)) +
    geom_boxplot()
par(mfrow=c(1,2))
ga_box1 <- ga_box_compare[ga_box_compare$variable=="beta1_bias",]
b3<- ggplot(ga_box1, aes(x=variable,y=value, fill=Data)) + xlab(NULL)+ ylab("GA")+scale_y_continuous(labels = percent_format())+ggtitle("") +geom_boxplot()+theme_linedraw()+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")+ coord_cartesian(ylim=c(-0.15, 0.15))
ga_box2 <- ga_box_compare[ga_box_compare$variable=="beta2_bias",]
b4<- ggplot(ga_box2, aes(x=variable,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+scale_y_continuous(labels = percent_format())+ggtitle("")+geom_boxplot()+theme_linedraw()+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")+ coord_cartesian(ylim=c(-0.006, 0.006))

require(cowplot)
library(grid)
library(gridExtra)

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  b1+ theme(legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
            panel.grid.minor = element_line(colour = 'transparent'),
            legend.box.background = element_rect(fill = "white", colour = NA))+
  scale_fill_brewer(palette = "Set1"))

# prow <- plot_grid(
#   b1+ theme(#plot.title =element_text(size=12,hjust= 0.5),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         panel.grid.major = element_line(colour = 'transparent'),
#         panel.grid.minor = element_line(colour = 'transparent'),
#         panel.background = element_rect(fill = "white", colour = NA),
#         legend.position = "none")+
#   scale_fill_brewer(palette = "Set1"),
#       b3+ theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         axis.title.y=element_blank(),
#         panel.grid.major = element_line(colour = 'transparent'),
#         panel.grid.minor = element_line(colour = 'transparent'),
#         panel.background = element_rect(fill = "white", colour = NA),
#         legend.position = "none")+
#   scale_fill_brewer(palette = "Set1"),
#   b2+ theme(#plot.title =element_text(size=12,hjust =0.5),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         panel.grid.major = element_line(colour = 'transparent'),
#         panel.grid.minor = element_line(colour = 'transparent'),
#         panel.background = element_rect(fill = "white", colour = NA),
#         legend.position = "none")+
#   scale_fill_brewer(palette = "Set1"),
#   b4+ theme(axis.title.x=element_blank(),
#         axis.text.x=element_blank(),
#         axis.ticks.x=element_blank(),
#         panel.grid.major = element_line(colour = 'transparent'),
#         panel.grid.minor = element_line(colour = 'transparent'),
#         panel.background = element_rect(fill = "white", colour = NA),
#         legend.position = "none")+
#   scale_fill_brewer(palette = "Set1"),
#  align = 'vh',nrow = 2)


box_compare <- box_compare %>% mutate(state = "MA")
ga_box_compare <- ga_box_compare %>% mutate(state = "GA")
box_compare <- rbind(box_compare, ga_box_compare)
# 对 state 变量因子化并按照需要的顺序排序
box_compare$state <- factor(box_compare$state, levels = c("MA", "GA"))
box_race <- box_compare[box_compare$variable=="beta1_bias",]
box_race <- ggplot(box_race, aes(x=variable,y=value, fill=Data)) + xlab(NULL)+ ylab(NULL)+scale_y_continuous(labels = percent_format())+ggtitle("") +  #ggtitle("Black vs NHW") +
    geom_boxplot() + theme_linedraw() + geom_hline(yintercept = 0, color = "black", linetype = "dashed")+ coord_cartesian(ylim=c(-0.15, 0.15)) + facet_wrap(~state)
box_poverty <- box_compare[box_compare$variable=="beta2_bias",]
box_poverty <- ggplot(box_poverty, aes(x=variable, y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+ scale_y_continuous(labels = percent_format()) + ggtitle("") + #ggtitle("Poverty") +
    geom_boxplot() + theme_linedraw() + geom_hline(yintercept = 0, color = "black", linetype = "dashed") + coord_cartesian(ylim=c(-0.006, 0.006)) + facet_wrap(~state)
prow_t <- plot_grid(
  box_race + theme(
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.title.y=element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.position = "none")+
  scale_fill_brewer(palette = "Set1"),
  box_poverty + theme(
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.position = "none")+
  scale_fill_brewer(palette = "Set1"),
 align = 'vh',nrow = 2)

coef_boxplot<-plot_grid(prow_t,legend, nrow=2, rel_heights = c(2, .25))
coef_boxplot

mean_bias <- rbind(mean(ce_sim$beta0_bias), mean(ce_sim$beta1_bias), mean(ce_sim$beta2_bias))%>% cbind( rbind(mean(dp_sim$beta0_bias), mean(dp_sim$beta1_bias), mean(dp_sim$beta2_bias))) %>% cbind( rbind(mean(dp0527_sim$beta0_bias), mean(dp0527_sim$beta1_bias), mean(dp0527_sim$beta2_bias)))%>% cbind( rbind(mean(dp0825_sim$beta0_bias), mean(dp0825_sim$beta1_bias), mean(dp0825_sim$beta2_bias)))

colnames(mean_bias)<-c("DC", "DP19", "DP20", "DP22")
#rownames(mean_bias) <- c()
xtable(mean_bias,digits = 4)


ga_mean_bias <- rbind(mean(ga_ce_sim$beta0_bias), mean(ga_ce_sim$beta1_bias), mean(ga_ce_sim$beta2_bias))%>% cbind( rbind(mean(ga_dp_sim$beta0_bias), mean(ga_dp_sim$beta1_bias), mean(ga_dp_sim$beta2_bias))) %>% cbind( rbind(mean(ga_dp0527_sim$beta0_bias), mean(ga_dp0527_sim$beta1_bias), mean(ga_dp0527_sim$beta2_bias)))%>% cbind( rbind(mean(ga_dp0825_sim$beta0_bias), mean(ga_dp0825_sim$beta1_bias), mean(ga_dp0825_sim$beta2_bias)))

colnames(ga_mean_bias)<-c("DC", "DP19", "DP20", "DP0825")
#rownames(mean_bias) <- c()
xtable(ga_mean_bias,digits = 4)

```


```{r coef CI coverage}

beta0_true <- 0
beta1_true <- 0.4
beta2_true <- 0.01

# check CI coverage
ce_ci_cov0 <- ifelse(beta0_true >= ce_sim$beta0_lo & beta0_true <= ce_sim$beta0_hi, 1, 0)
ce_ci_cov1 <- ifelse(beta1_true >= ce_sim$beta1_lo & beta1_true <= ce_sim$beta1_hi, 1, 0)
ce_ci_cov2 <- ifelse(beta2_true >= ce_sim$beta2_lo & beta2_true <= ce_sim$beta2_hi, 1, 0)

dp_ci_cov0 <- ifelse(beta0_true >= dp_sim$beta0_lo & beta0_true <= dp_sim$beta0_hi, 1, 0)
dp_ci_cov1 <- ifelse(beta1_true >= dp_sim$beta1_lo & beta1_true <= dp_sim$beta1_hi, 1, 0)
dp_ci_cov2 <- ifelse(beta2_true >= dp_sim$beta2_lo & beta2_true <= dp_sim$beta2_hi, 1, 0)

dp0527_ci_cov0 <- ifelse(beta0_true >= dp0527_sim$beta0_lo & beta0_true <= dp0527_sim$beta0_hi, 1, 0)
dp0527_ci_cov1 <- ifelse(beta1_true >= dp0527_sim$beta1_lo & beta1_true <= dp0527_sim$beta1_hi, 1, 0)
dp0527_ci_cov2 <- ifelse(beta2_true >= dp0527_sim$beta2_lo & beta2_true <= dp0527_sim$beta2_hi, 1, 0)

# dp22_ci_cov0 <- ifelse(beta0_true >= dp22_sim$beta0_lo & beta0_true <= dp22_sim$beta0_hi, 1, 0)
# dp22_ci_cov1 <- ifelse(beta1_true >= dp22_sim$beta1_lo & beta1_true <= dp22_sim$beta1_hi, 1, 0)
# dp22_ci_cov2 <- ifelse(beta2_true >= dp22_sim$beta2_lo & beta2_true <= dp22_sim$beta2_hi, 1, 0)

dp0825_ci_cov0 <- ifelse(beta0_true >= dp0825_sim$beta0_lo & beta0_true <= dp0825_sim$beta0_hi, 1, 0)
dp0825_ci_cov1 <- ifelse(beta1_true >= dp0825_sim$beta1_lo & beta1_true <= dp0825_sim$beta1_hi, 1, 0)
dp0825_ci_cov2 <- ifelse(beta2_true >= dp0825_sim$beta2_lo & beta2_true <= dp0825_sim$beta2_hi, 1, 0)

# check CI coverage
ce_ci_cov <- c(mean(ce_ci_cov0), mean(ce_ci_cov1), mean(ce_ci_cov2))
dp_ci_cov <- c(mean(dp_ci_cov0), mean(dp_ci_cov1), mean(dp_ci_cov2))
dp0527_ci_cov <- c(mean(dp0527_ci_cov0), mean(dp0527_ci_cov1), mean(dp0527_ci_cov2))
# dp22_ci_cov <- c(mean(dp22_ci_cov0), mean(dp22_ci_cov1), mean(dp22_ci_cov2))
dp0825_ci_cov <- c(mean(dp0825_ci_cov0), mean(dp0825_ci_cov1), mean(dp0825_ci_cov2))

ci_cov <- cbind(ce_ci_cov, dp_ci_cov, dp0527_ci_cov, dp0825_ci_cov)
rownames(ci_cov) <- c("beta0", "beta1", "beta2")
colnames(ci_cov) <- c("DC", "DP19", "DP20", "DP22")

xtable(ci_cov*100, digits = 0)



############
### GA #####
############


# check CI coverage
ce_ci_cov0 <- ifelse(beta0_true >= ga_ce_sim$beta0_lo & beta0_true <= ga_ce_sim$beta0_hi, 1, 0)
ce_ci_cov1 <- ifelse(beta1_true >= ga_ce_sim$beta1_lo & beta1_true <= ga_ce_sim$beta1_hi, 1, 0)
ce_ci_cov2 <- ifelse(beta2_true >= ga_ce_sim$beta2_lo & beta2_true <= ga_ce_sim$beta2_hi, 1, 0)

dp_ci_cov0 <- ifelse(beta0_true >= ga_dp_sim$beta0_lo & beta0_true <= ga_dp_sim$beta0_hi, 1, 0)
dp_ci_cov1 <- ifelse(beta1_true >= ga_dp_sim$beta1_lo & beta1_true <= ga_dp_sim$beta1_hi, 1, 0)
dp_ci_cov2 <- ifelse(beta2_true >= ga_dp_sim$beta2_lo & beta2_true <= ga_dp_sim$beta2_hi, 1, 0)

dp0527_ci_cov0 <- ifelse(beta0_true >= ga_dp0527_sim$beta0_lo & beta0_true <= ga_dp0527_sim$beta0_hi, 1, 0)
dp0527_ci_cov1 <- ifelse(beta1_true >= ga_dp0527_sim$beta1_lo & beta1_true <= ga_dp0527_sim$beta1_hi, 1, 0)
dp0527_ci_cov2 <- ifelse(beta2_true >= ga_dp0527_sim$beta2_lo & beta2_true <= ga_dp0527_sim$beta2_hi, 1, 0)

# dp22_ci_cov0 <- ifelse(beta0_true >= dp22_sim$beta0_lo & beta0_true <= dp22_sim$beta0_hi, 1, 0)
# dp22_ci_cov1 <- ifelse(beta1_true >= dp22_sim$beta1_lo & beta1_true <= dp22_sim$beta1_hi, 1, 0)
# dp22_ci_cov2 <- ifelse(beta2_true >= dp22_sim$beta2_lo & beta2_true <= dp22_sim$beta2_hi, 1, 0)

dp0825_ci_cov0 <- ifelse(beta0_true >= ga_dp0825_sim$beta0_lo & beta0_true <= ga_dp0825_sim$beta0_hi, 1, 0)
dp0825_ci_cov1 <- ifelse(beta1_true >= ga_dp0825_sim$beta1_lo & beta1_true <= ga_dp0825_sim$beta1_hi, 1, 0)
dp0825_ci_cov2 <- ifelse(beta2_true >= ga_dp0825_sim$beta2_lo & beta2_true <= ga_dp0825_sim$beta2_hi, 1, 0)

# check CI coverage
ce_ci_cov <- c(mean(ce_ci_cov0), mean(ce_ci_cov1), mean(ce_ci_cov2))
dp_ci_cov <- c(mean(dp_ci_cov0), mean(dp_ci_cov1), mean(dp_ci_cov2))
dp0527_ci_cov <- c(mean(dp0527_ci_cov0), mean(dp0527_ci_cov1), mean(dp0527_ci_cov2))
# dp22_ci_cov <- c(mean(dp22_ci_cov0), mean(dp22_ci_cov1), mean(dp22_ci_cov2))
dp0825_ci_cov <- c(mean(dp0825_ci_cov0), mean(dp0825_ci_cov1), mean(dp0825_ci_cov2))

ga_ci_cov <- cbind(ce_ci_cov, dp_ci_cov, dp0527_ci_cov, dp0825_ci_cov)
rownames(ga_ci_cov) <- c("beta0", "beta1", "beta2")
colnames(ga_ci_cov) <- c("DC", "DP19", "DP20", "DP22")

xtable(ga_ci_cov*100, digits = 0)




```



# Figure 2

Boxplots of DP vs DC expected counts in MA and GA
Changes to the current figure:
-Use the “Set1” color palette from Rcolorbrewer
-Make the plots with facet_wrap() so that there is just one legend and use same y-axis scale for both (in response to Reviewer 1 comment to use consistent scales)

```{r coef percent error}
# percent error = (coef - beta)/beta * 100%
## coef percent error##
# ce_sim <- ce_sim %>% mutate(beta0_pe = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
# dp_sim <- dp_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
# dp0527_sim <- dp0527_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01)
# dp22_sim <- dp22_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 
# dp0825_sim <- dp0825_sim %>% mutate(beta0_bias = beta0 - 0) %>% mutate(beta1_bias = beta1 - 0.4) %>% mutate(beta2_bias = beta2 - 0.01) 

# box_compare1 <- ce_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
# #box_compare1 <- ce_sim[,c("beta0", "beta1", "beta2")] 
# box_compare1 <- box_compare1 %>% mutate(Data="DC") 
# #box_compare2 <- dp_sim[,c("beta0", "beta1", "beta2")] 
# box_compare2 <- dp_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
# box_compare2 <- box_compare2 %>% mutate(Data="DP19")
# box_compare3 <- dp0527_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
# #box_compare3 <- dp0527_sim[,c("beta0", "beta1", "beta2")] 
# box_compare3 <- box_compare3 %>% mutate(Data="DP20")
# box_compare4 <- dp22_sim[,c("beta0_bias", "beta1_bias", "beta2_bias")] 
# #box_compare3 <- dp0527_sim[,c("beta0", "beta1", "beta2")] 
# box_compare4 <- box_compare4 %>% mutate(Data="DP22")
# box_compare <- melt(rbind(box_compare1, box_compare2, box_compare3, box_compare4))



summary_text <- adat %>% mutate(DP19 = (exp_counts_dp-exp_counts_ce)/exp_counts_ce) %>% mutate(DP20 = (exp_counts_dp0527-exp_counts_ce)/exp_counts_ce) %>% mutate(DP22 = (exp_counts_dp0825-exp_counts_ce)/exp_counts_ce)

summary_text_ga <- adat_ga %>% mutate(DP19 = (exp_counts_dp-exp_counts_ce)/exp_counts_ce) %>% mutate(DP20 = (exp_counts_dp0527-exp_counts_ce)/exp_counts_ce) %>% mutate(DP22 = (exp_counts_dp0825-exp_counts_ce)/exp_counts_ce)

box_pe <- melt(summary_text[,c("race", "DP19", "DP20","DP22")])

box_pe$race<-mapvalues(box_pe$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))

colnames(box_pe) <- c("race", "Data", "value")

box_pe <- box_pe %>% mutate(state = "MA")

box_pe_ga <- melt(summary_text_ga[,c("race", "DP19", "DP20","DP22")])

box_pe_ga$race<-mapvalues(box_pe_ga$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))

colnames(box_pe_ga) <- c("race", "Data", "value")

box_pe_ga <- box_pe_ga %>% mutate(state = "GA")

box_pe <- rbind(box_pe, box_pe_ga)
# fig1 <- ggplot(box_pe, aes(x=race,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+ggtitle("")+ geom_boxplot(outlier.shape = NA)+ coord_cartesian(ylim=c(-1.2, 1.8))+ ylab(NULL) 

# 对 state 变量因子化并按照需要的顺序排序
box_pe$state <- factor(box_pe$state, levels = c("MA", "GA"))

fig1_percent_scale <- ggplot(box_pe, aes(x=race,y=value, fill=Data)) + xlab(NULL) + ylab(NULL)+ggtitle("")+
 facet_wrap(~state)+theme_linedraw()+ 
   geom_boxplot(outlier.shape = NA)+ 
scale_y_continuous(labels = percent_format(), limits = c(-1.03, 1.2))+
  theme(strip.text = element_blank(),
        #strip.text = element_text(size = 18),
    #strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  #scale_fill_brewer(palette = "Set1")
  scale_fill_manual(values=brewer.pal(n = 4, name = "Set1")[-1], na.value = "white")+ 
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")
fig1_percent_scale
```






### expected counts scatter plots

```{r scatter plot dp/dp0527 P vs ce Ptrue}
Ptrue <- matrix(adat$exp_counts_ce)
P_dp <- matrix(adat$exp_counts_dp)
colnames(P_dp) <- "DP19"
P_dp0527 <- matrix(adat$exp_counts_dp0527)
colnames(P_dp0527) <- c("DP20")
# P_dp22 <- matrix(adat$exp_counts_dp22)
# colnames(P_dp22) <- c("DP22")
P_dp22 <- matrix(adat$exp_counts_dp0825)
colnames(P_dp22) <- c("DP22")

P2_w_GEOID <- cbind(adat[1:2], P_dp, P_dp0527, P_dp22, Ptrue) 

P_dp_ce <- melt(P2_w_GEOID, id=c("GEOID", "race", "Ptrue"))
names(P_dp_ce)[3:4] <- c("DC", "Data")

P_dp_ce$race<-mapvalues(P_dp_ce$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))
P_dp_ce$race <- factor(P_dp_ce$race, levels = c('NHW','Black'))

P_dp_ce_scatter <- ggplot(data = P_dp_ce, mapping = aes(x = DC, y = value, color=Data)) + geom_point(alpha=1) +
  coord_cartesian(xlim=c(0, 36),ylim=c(0, 36))+ theme(strip.text.x = element_text(size=12),strip.background = element_rect(fill = "white", colour = NA))+
  ylab("Expected DP counts") + xlab("Expected DC counts")+
   scale_colour_manual(values=brewer.pal(n = 4, name = "Set1")[-1], na.value = "white")
scatter_exp <- P_dp_ce_scatter +theme_linedraw()+ geom_abline(colour="black", linetype ="dashed")+
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        #strip.text = element_text(family = NULL,face = NULL,colour = NULL,size = 12,hjust = NULL,vjust = NULL),
        strip.text.x.top= element_text(size=12),
        #strip.text.x = element_text(size=12),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) + facet_wrap(~race) 

#############
## GA #######
#############

Ptrue_ga <- matrix(adat_ga$exp_counts_ce)
P_dp_ga <- matrix(adat_ga$exp_counts_dp)
colnames(P_dp_ga) <- "DP19"
P_dp0527_ga <- matrix(adat_ga$exp_counts_dp0527)
colnames(P_dp0527_ga) <- c("DP20")
P_dp22_ga <- matrix(adat_ga$exp_counts_dp0825)
colnames(P_dp22_ga) <- c("DP22")

P2_w_GEOID_ga <- cbind(adat_ga[1:2], P_dp_ga, P_dp0527_ga, P_dp22_ga, Ptrue_ga) 

P_dp_ce_ga <- melt(P2_w_GEOID_ga, id=c("GEOID", "race", "Ptrue_ga"))
names(P_dp_ce_ga)[3:4] <- c("DC", "Data")

P_dp_ce_ga$race<-mapvalues(P_dp_ce_ga$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))
P_dp_ce_ga$race <- factor(P_dp_ce_ga$race, levels = c('NHW','Black'))

P_dp_ce_scatter_ga <- ggplot(data = P_dp_ce_ga, mapping = aes(x = DC, y = value, color=Data)) + geom_point(alpha=1) + 
  coord_cartesian(xlim=c(0, 36),ylim=c(0, 36))+
   scale_colour_manual(values=brewer.pal(n = 4, name = "Set1")[-1], na.value = "white")
scatter_exp_ga <- P_dp_ce_scatter_ga +theme_linedraw()+ facet_wrap(~race) + geom_abline(colour="black", linetype ="dashed")+
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        strip.text = element_text(family = NULL,face = NULL,colour = NULL,size = 12,hjust = NULL,vjust = NULL),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "none", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent')) 

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  scatter_exp+ theme(legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)))

scatter2 <- plot_grid(scatter_exp+theme(legend.position = "none"),
                      scatter_exp_ga,legend, nrow=3, rel_heights = c(10,10,1))


```

Average difference of expected counts

```{r}
mean(P_dp-Ptrue)
mean(P_dp0527-Ptrue)
mean(P_dp22-Ptrue)
# mean and standard deviation of the differences in the CT-level DP and DC expected counts for the NHW population are XX for DP19

summary_text <- adat %>% mutate(diff19 = exp_counts_dp-exp_counts_ce) %>% mutate(diff20 = exp_counts_dp0527-exp_counts_ce)%>% mutate(diff22 = exp_counts_dp0825-exp_counts_ce)

diff19_w <- summary_text[summary_text$race=="white",]$diff19
diff19_b <- summary_text[summary_text$race=="black",]$diff19

diff20_w <- summary_text[summary_text$race=="white",]$diff20
diff20_b <- summary_text[summary_text$race=="black",]$diff20

diff22_w <- summary_text[summary_text$race=="white",]$diff22
diff22_b <- summary_text[summary_text$race=="black",]$diff22

huizong = function(d){
  hz <- c(mean(d),sd(d))
  return((hz))
 }

huizong(diff19_w)
huizong(diff19_b)
huizong(diff20_w)
huizong(diff20_b)
huizong(diff22_w)
huizong(diff22_b)


ga_summary_text <- adat_ga %>% mutate(diff19 = exp_counts_dp-exp_counts_ce) %>% mutate(diff20 = exp_counts_dp0527-exp_counts_ce)%>% mutate(diff22 = exp_counts_dp0825-exp_counts_ce)

diff19_w <- ga_summary_text[ga_summary_text$race=="white",]$diff19
diff19_b <- ga_summary_text[ga_summary_text$race=="black",]$diff19

diff20_w <- ga_summary_text[ga_summary_text$race=="white",]$diff20
diff20_b <- ga_summary_text[ga_summary_text$race=="black",]$diff20

diff22_w <- ga_summary_text[ga_summary_text$race=="white",]$diff22
diff22_b <- ga_summary_text[ga_summary_text$race=="black",]$diff22

huizong = function(d){
  hz <- c(mean(d),sd(d))
  return((hz))
 }
print("GA result")
huizong(diff19_w)
huizong(diff19_b)
huizong(diff20_w)
huizong(diff20_b)
huizong(diff22_w)
huizong(diff22_b)



```


```{r percent error}
# percent error = (expectedDP - expectedDC)/expectedDC * 100%
summary_text <- adat %>% mutate(DP19 = (exp_counts_dp-exp_counts_ce)/exp_counts_ce) %>% mutate(DP20 = (exp_counts_dp0527-exp_counts_ce)/exp_counts_ce) %>%  mutate(DP22 = (exp_counts_dp0825-exp_counts_ce)/exp_counts_ce)

pe19_w <- summary_text[summary_text$race=="white",]$DP19
pe19_b <- summary_text[summary_text$race=="black",]$DP19

pe20_w <- summary_text[summary_text$race=="white",]$DP20
pe20_b <- summary_text[summary_text$race=="black",]$DP20

pe22_w <- summary_text[summary_text$race=="white",]$DP22
pe22_b <- summary_text[summary_text$race=="black",]$DP22


summary_pe_under <- rbind(mean(pe19_w<0), mean(pe19_b<0))%>% cbind( rbind(mean(pe20_w<0), mean(pe20_b<0))) %>% cbind( rbind(mean(pe22_w<0), mean(pe22_b<0)))
summary_pe_under <- summary_pe_under*100
colnames(summary_pe_under)<-c("DP19", "DP20", "DP22")
rownames(summary_pe_under) <- c("NHW", "Black")
xtable(summary_pe_under,digits = 4)

############
### GA #####
############


# percent error = (expectedDP - expectedDC)/expectedDC * 100%
ga_summary_text <- adat_ga %>% mutate(DP19 = (exp_counts_dp-exp_counts_ce)/exp_counts_ce) %>% mutate(DP20 = (exp_counts_dp0527-exp_counts_ce)/exp_counts_ce) %>%  mutate(DP22 = (exp_counts_dp0825-exp_counts_ce)/exp_counts_ce)

pe19_w <- ga_summary_text[ga_summary_text$race=="white",]$DP19
pe19_b <- ga_summary_text[ga_summary_text$race=="black",]$DP19

pe20_w <- ga_summary_text[ga_summary_text$race=="white",]$DP20
pe20_b <- ga_summary_text[ga_summary_text$race=="black",]$DP20

pe22_w <- ga_summary_text[ga_summary_text$race=="white",]$DP22
pe22_b <- ga_summary_text[ga_summary_text$race=="black",]$DP22


summary_pe_under_ga <- rbind(mean(pe19_w<0), mean(pe19_b<0))%>% cbind( rbind(mean(pe20_w<0), mean(pe20_b<0))) %>% cbind( rbind(mean(pe22_w<0), mean(pe22_b<0)))
summary_pe_under_ga <- summary_pe_under_ga*100
colnames(summary_pe_under_ga)<-c("DP19", "DP20", "DP22")
rownames(summary_pe_under_ga) <- c("NHW", "Black")
xtable(summary_pe_under_ga,digits = 4)


```




## boxplot of comparasion for the SMR estimates using the census denominators with the true SMRs

```{r sim_from_cluster2}
load("../results_manu/fitted_value3.RData")
dp0527_fit <- fitted_values
dp0527_fit_mean <- apply(dp0527_fit, 2, mean)
load("../results_manu/fitted_value2.RData")
dp_fit <- fitted_values
dp_fit_mean <- apply(dp_fit, 2, mean)
load("../results_manu/fitted_value10.RData")
ce_fit <- fitted_values
ce_fit_mean <- apply(ce_fit, 2, mean)
load("../results_manu/fitted_value5.RData")
dp2208_fit <- fitted_values
dp2208_fit_mean <- apply(dp2208_fit, 2, mean)

load("../results_manu/exp_theta3.RData")
true_smrs_dp20 = exp_theta
load("../results_manu/exp_theta2.RData")
true_smrs_dp19 = exp_theta
load("../results_manu/exp_theta10.RData")
true_smrs_ce = exp_theta
load("../results_manu/exp_theta5.RData")
true_smrs_dp2208 = exp_theta

# old dp
exp_counts_dp19 = data.frame(t(matrix(rep(adat$exp_counts_dp,100),c(length(adat$exp_counts_dp),100))))
smrdp19 = dp_fit/exp_counts_dp19
mape_ij_dp19 = colSums(abs((smrdp19-true_smrs_dp19)/true_smrs_dp19))/100
# 2020-05-27 dp
exp_counts_dp20 = data.frame(t(matrix(rep(adat$exp_counts_dp0527,100),c(length(adat$exp_counts_dp0527),100))))
smrdp20 = dp0527_fit/exp_counts_dp20
mape_ij_dp20 = colSums(abs((smrdp20-true_smrs_dp20)/true_smrs_dp20))/100
# ce
exp_counts_ce = data.frame(t(matrix(rep(adat$exp_counts_ce,100),c(length(adat$exp_counts_ce),100))))
smrce = ce_fit/exp_counts_ce
mape_ij_ce = colSums(abs((smrce-true_smrs_ce)/true_smrs_ce))/100
# 2022-08-25 dp
exp_counts_dp2208 = data.frame(t(matrix(rep(adat$exp_counts_dp0825,100),c(length(adat$exp_counts_dp0825),100))))
smrdp2208 = dp2208_fit/exp_counts_dp2208
mape_ij_dp2208 = colSums(abs((smrdp2208-true_smrs_dp2208)/true_smrs_dp2208))/100


```



```{r MAPE boxplot2}
# looking at boxplots of the mean absolute percentage error of the SMRs for each CT/race group (relative to ce)
smr_mape <- cbind(adat[1:2], mape_ij_ce, mape_ij_dp19, mape_ij_dp20, mape_ij_dp2208) 
smr_mape_box <- smr_mape[,c("GEOID", "race", "mape_ij_ce","mape_ij_dp19","mape_ij_dp20", "mape_ij_dp2208")]
names(smr_mape_box) <- c("GEOID", "race", "DC","DP19","DP20", "DP22")

melt_mape <- melt(smr_mape_box)
names(melt_mape)<-c("GEOID","race","Data","value")

melt_mape$race<-mapvalues(melt_mape$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))


smr_mape_bw_woo <- ggplot(melt_mape, aes(x=race,y=value, fill=Data)) + 
    geom_boxplot(outlier.shape = NA)+ ylab(NULL)+ xlab(NULL)+
scale_y_continuous(labels = percent_format(), limits = c(0, 1.2))+ggtitle("")+
  theme_linedraw()+
  theme(strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  scale_fill_brewer(palette = "Set1")+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")

# old dp
bias_ij_dp19 = colSums(smrdp19-true_smrs_dp19)/100
# 2020-05-27 dp
bias_ij_dp20 = colSums(smrdp20-true_smrs_dp20)/100
# 2022-08-25 dp
bias_ij_dp2208 = colSums(smrdp2208-true_smrs_dp2208)/100
# ce
bias_ij_ce = colSums(smrce-true_smrs_ce)/100

# looking at boxplots of the mean absolute percentage error of the SMRs for each CT/race group (relative to ce)
smr_bias <- cbind(adat[1:2], bias_ij_ce, bias_ij_dp19, bias_ij_dp20, bias_ij_dp2208) 
smr_bias_box <- smr_bias[,c("GEOID", "race", "bias_ij_ce","bias_ij_dp19","bias_ij_dp20", "bias_ij_dp2208")]
names(smr_bias_box) <- c("GEOID", "race", "DC","DP19","DP20", "DP22")
melt_bias = melt(smr_bias_box)

melt_bias$race<-mapvalues(melt_bias$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))
names(melt_bias)<-c("GEOID","race","Data","value")


smr_bias_bw_woo <- ggplot(melt_bias, aes(x=race,y=value, fill=variable)) + 
    geom_boxplot(outlier.shape = NA)+ coord_cartesian(ylim=c(-0.5, 0.8))+ 
  theme_linedraw()+
  xlab(NULL)+ylab(NULL)+
  theme(strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  scale_fill_brewer(palette = "Set1")+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")

require(cowplot)
library(grid)
library(gridExtra)

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  smr_mape_bw_woo+ theme(legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)))

# prow <- plot_grid(
#   smr_bias_bw_woo+ theme(legend.position="none"),
#   smr_mape_bw_woo+ theme(legend.position="none"),
#   align = 'vh',
#   labels = c("Bias", "MAPE"),
#   label_x = 0.5,
#   #hjust = -1,
#   nrow = 1
# )


```


```{r GA bias_mape boxplot}
load("../results_manu/GA/fitted_value3.RData")
ga_dp0527_fit <- fitted_values
ga_dp0527_fit_mean <- apply(ga_dp0527_fit, 2, mean)
load("../results_manu/GA/fitted_value2.RData")
ga_dp_fit <- fitted_values
ga_dp_fit_mean <- apply(ga_dp_fit, 2, mean)
load("../results_manu/GA/fitted_value1.RData")
ga_ce_fit <- fitted_values
ga_ce_fit_mean <- apply(ga_ce_fit, 2, mean)
load("../results_manu/GA/fitted_value5.RData")
ga_dp2208_fit <- fitted_values
ga_dp2208_fit_mean <- apply(ga_dp2208_fit, 2, mean)

load("../results_manu/GA/exp_theta3.RData")
ga_true_smrs_dp20 = exp_theta
load("../results_manu/GA/exp_theta2.RData")
ga_true_smrs_dp19 = exp_theta
load("../results_manu/GA/exp_theta1.RData")
ga_true_smrs_ce = exp_theta
load("../results_manu/GA/exp_theta5.RData")
ga_true_smrs_dp2208 = exp_theta

# old dp
exp_counts_dp19 = data.frame(t(matrix(rep(adat_ga$exp_counts_dp,100),c(length(adat_ga$exp_counts_dp),100))))
smrdp19 = ga_dp_fit/exp_counts_dp19
ga_mape_ij_dp19 = colSums(abs((smrdp19-ga_true_smrs_dp19)/ga_true_smrs_dp19))/100
# 2020-05-27 dp
exp_counts_dp20 = data.frame(t(matrix(rep(adat_ga$exp_counts_dp0527,100),c(length(adat_ga$exp_counts_dp0527),100))))
smrdp20 = ga_dp0527_fit/exp_counts_dp20
ga_mape_ij_dp20 = colSums(abs((smrdp20-ga_true_smrs_dp20)/ga_true_smrs_dp20))/100
# ce
exp_counts_ce = data.frame(t(matrix(rep(adat_ga$exp_counts_ce,100),c(length(adat_ga$exp_counts_ce),100))))
smrce = ga_ce_fit/exp_counts_ce
ga_mape_ij_ce = colSums(abs((smrce-ga_true_smrs_ce)/ga_true_smrs_ce))/100
# 2022-08-25 dp
exp_counts_dp2208 = data.frame(t(matrix(rep(adat_ga$exp_counts_dp0825,100),c(length(adat_ga$exp_counts_dp0825),100))))
smrdp2208 = ga_dp2208_fit/exp_counts_dp2208
ga_mape_ij_dp2208 = colSums(abs((smrdp2208-ga_true_smrs_dp2208)/ga_true_smrs_dp2208))/100

# looking at boxplots of the mean absolute percentage error of the SMRs for each CT/race group (relative to ce)
ga_smr_mape <- cbind(adat_ga[1:2], ga_mape_ij_ce, ga_mape_ij_dp19, ga_mape_ij_dp20,  ga_mape_ij_dp2208) 
ga_smr_mape_box <- ga_smr_mape[,c("GEOID", "race", "ga_mape_ij_ce","ga_mape_ij_dp19","ga_mape_ij_dp20", "ga_mape_ij_dp2208")]
names(ga_smr_mape_box) <- c("GEOID", "race", "DC","DP19","DP20", "DP22")

ga_melt_mape <- melt(ga_smr_mape_box)
names(ga_melt_mape)<-c("GEOID","race","Data","value")

ga_melt_mape$race<-mapvalues(ga_melt_mape$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))

melt_mape$state <- "MA"
ga_melt_mape$state <- "GA"
melt_mape_2state <- melt(rbind(melt_mape, ga_melt_mape))
melt_mape_2state$state <- factor(melt_mape_2state$state, levels = c("MA", "GA"))
smr_mape_bw_2 <- ggplot(melt_mape_2state, aes(x=race,y=value, fill=Data)) +
    geom_boxplot(outlier.shape = NA)+ ylab(NULL)+ xlab(NULL)+ theme_linedraw()+
    scale_y_continuous(labels = percent_format(), limits = c(0, 1.2))+ggtitle("")+
    theme(strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom",
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
    scale_fill_brewer(palette = "Set1")+ geom_hline(yintercept = 0, color = "black", linetype =   "dashed") +        
    facet_wrap(~state)

# ga_smr_mape_bw_woo <- ggplot(ga_melt_mape, aes(x=race,y=value, fill=Data)) + 
#     geom_boxplot(outlier.shape = NA)+ ylab(NULL)+ xlab(NULL)+
#   theme_linedraw()+
# scale_y_continuous(labels = percent_format(), limits = c(0, 1.2))+ggtitle("")+
#   theme(strip.background = element_rect(fill = "white", colour = NA),
#         panel.background = element_rect(fill = "white", colour = NA),
#     legend.direction = "horizontal",
#     axis.title.y=element_blank(),
#     axis.ticks.y=element_blank(),axis.text.y=element_blank(),
#             legend.background = element_rect(fill = "white", colour = NA),
#             legend.position = "bottom", 
#             legend.key = element_rect(fill = "white", colour = NA),
#             panel.grid.major = element_line(colour = 'transparent'),
#         panel.grid.minor = element_line(colour = 'transparent'),
#         legend.box.background = element_rect(fill = "white", colour = NA))+
#   scale_fill_brewer(palette = "Set1")+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")


# old dp
ga_bias_ij_dp19 = colSums(smrdp19-ga_true_smrs_dp19)/100
# 2020-05-27 dp
ga_bias_ij_dp20 = colSums(smrdp20-ga_true_smrs_dp20)/100
# 2022-08-25 dp
ga_bias_ij_dp2208 = colSums(smrdp2208-ga_true_smrs_dp2208)/100
# ce
ga_bias_ij_ce = colSums(smrce-ga_true_smrs_ce)/100

# looking at boxplots of the mean absolute percentage error of the SMRs for each CT/race group (relative to ce)
ga_smr_bias <- cbind(adat_ga[1:2], ga_bias_ij_ce, ga_bias_ij_dp19, ga_bias_ij_dp20, ga_bias_ij_dp2208) 
ga_smr_bias_box <- ga_smr_bias[,c("GEOID", "race", "ga_bias_ij_ce","ga_bias_ij_dp19","ga_bias_ij_dp20", "ga_bias_ij_dp2208")]
names(ga_smr_bias_box) <- c("GEOID", "race", "DC","DP19","DP20", "DP22")


ga_melt_bias = melt(ga_smr_bias_box)
ga_melt_bias$race<-mapvalues(ga_melt_bias$race,
                         from=c('white','black'),
                         to=c('NHW','Black'))
names(ga_melt_bias)<-c("GEOID","race","Data","value")


ga_smr_bias_bw_woo <- ggplot(ga_melt_bias, aes(x=race,y=value, fill=variable)) + 
    geom_boxplot(outlier.shape = NA)+ coord_cartesian(ylim=c(-0.5, 0.8))+ xlab(NULL)+ylab(NULL)+
  theme_linedraw()+
  theme(strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  scale_fill_brewer(palette = "Set1")+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")



melt_bias$state <- "MA"
ga_melt_bias$state <- "GA"
melt_bias_2state <- melt(rbind(melt_bias, ga_melt_bias))
melt_bias_2state$state <- factor(melt_bias_2state$state, levels = c("MA", "GA"))
smr_bias_bw_2 <- ggplot(melt_bias_2state, aes(x=race,y=value, fill=Data)) + 
    geom_boxplot(outlier.shape = NA)+ coord_cartesian(ylim=c(-0.5, 0.8))+ xlab(NULL)+ylab(NULL)+
  theme_linedraw()+
  theme(strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  scale_fill_brewer(palette = "Set1")+ geom_hline(yintercept = 0, color = "black", linetype = "dashed")+facet_wrap(~state)


# prow_ga <- plot_grid(
#   ga_smr_bias_bw_woo+ theme(legend.position="none"),
#   ga_smr_mape_bw_woo+ theme(legend.position="none"),
#   align = 'vh',
#   #labels = c("Bias", "MAPE"),
#   hjust = -1,
#   nrow = 1)
aligned_plots <- align_plots( smr_bias_bw_2+ theme(panel.spacing.y = unit(0, "cm"),legend.position="none"),
                               smr_mape_bw_2+ theme(panel.spacing.y = unit(0, "cm"),legend.position="none"), align = "v", axis = "y")

smr_mape_bias_t <-plot_grid(aligned_plots[[1]], aligned_plots[[2]], legend, ncol = 1, rel_heights = c(10,10,1))
# smr_mape_bias <-cowplot::plot_grid(prow,prow_ga, legend, ncol=1, rel_heights = c(10,10,1))
# smr_mape_bias
```




```{r appendix tables}
bias10_w <- smr_bias[smr_bias$race=="white",]$bias_ij_ce
bias10_b <- smr_bias[smr_bias$race=="black",]$bias_ij_ce

bias19_w <- smr_bias[smr_bias$race=="white",]$bias_ij_dp19
bias19_b <- smr_bias[smr_bias$race=="black",]$bias_ij_dp19

bias20_w <- smr_bias[smr_bias$race=="white",]$bias_ij_dp20
bias20_b <- smr_bias[smr_bias$race=="black",]$bias_ij_dp20

bias22_w <- smr_bias[smr_bias$race=="white",]$bias_ij_dp22
bias22_b <- smr_bias[smr_bias$race=="black",]$bias_ij_dp22

bias2208_w <- smr_bias[smr_bias$race=="white",]$bias_ij_dp2208
bias2208_b <- smr_bias[smr_bias$race=="black",]$bias_ij_dp2208

huizong(bias10_w)
huizong(bias10_b)
huizong(bias19_w)
huizong(bias19_b)
huizong(bias20_w)
huizong(bias20_b)
huizong(bias22_w)
huizong(bias22_b)
huizong(bias2208_w)
huizong(bias2208_b)

# percentage of upward biases
upward_bias <- rbind(mean(bias10_w>0), mean(bias10_b>0))%>% cbind( rbind(mean(bias19_w>0), mean(bias19_b>0)))%>% cbind( rbind(mean(bias20_w>0), mean(bias20_b>0))) %>% cbind( rbind(mean(bias22_w>0), mean(bias22_b>0)))%>% cbind( rbind(mean(bias2208_w>0), mean(bias2208_b>0)))
upward_bias <- upward_bias*100
colnames(upward_bias)<-c("DC","DP19", "DP20", "DP22", "DP2208")
rownames(upward_bias) <- c("NHW", "Black")
xtable(upward_bias,digits = 4)

# average biases
avg_bias <- rbind(mean(bias10_w), mean(bias10_b))%>% cbind( rbind(mean(bias19_w), mean(bias19_b)))%>% cbind( rbind(mean(bias20_w), mean(bias20_b))) %>% cbind( rbind(mean(bias22_w), mean(bias22_b)))%>% cbind( rbind(mean(bias2208_w), mean(bias2208_b)))

colnames(avg_bias)<-c("DC","DP19", "DP20", "DP22", "DP2208")
rownames(avg_bias) <- c("NHW", "Black")
xtable(avg_bias,digits = 4)

# average mapes

mape10_w <- smr_mape[smr_mape$race=="white",]$mape_ij_ce
mape10_b <- smr_mape[smr_mape$race=="black",]$mape_ij_ce

mape19_w <- smr_mape[smr_mape$race=="white",]$mape_ij_dp19
mape19_b <- smr_mape[smr_mape$race=="black",]$mape_ij_dp19

mape20_w <- smr_mape[smr_mape$race=="white",]$mape_ij_dp20
mape20_b <- smr_mape[smr_mape$race=="black",]$mape_ij_dp20

mape22_w <- smr_mape[smr_mape$race=="white",]$mape_ij_dp22
mape22_b <- smr_mape[smr_mape$race=="black",]$mape_ij_dp22

mape2208_w <- smr_mape[smr_mape$race=="white",]$mape_ij_dp2208
mape2208_b <- smr_mape[smr_mape$race=="black",]$mape_ij_dp2208

huizong(mape10_w)
huizong(mape10_b)
huizong(mape19_w)
huizong(mape19_b)
huizong(mape20_w)
huizong(mape20_b)
huizong(mape22_w)
huizong(mape22_b)
huizong(mape2208_w)
huizong(mape2208_b)

avg_mape <- rbind(mean(mape10_w), mean(mape10_b))%>% cbind( rbind(mean(mape19_w), mean(mape19_b)))%>% cbind( rbind(mean(mape20_w), mean(mape20_b))) %>% cbind( rbind(mean(mape22_w), mean(mape22_b)))%>% cbind( rbind(mean(mape2208_w), mean(mape2208_b)))

colnames(avg_mape)<-c("DC","DP19", "DP20", "DP22", "DP2208")
rownames(avg_mape) <- c("NHW", "Black")
xtable(avg_mape,digits = 4)


```

### maps of MAPE for 2020-05-27 version

```{r}
#### MAPE0527 plot
load('../bw_adat.RData')
adat <- cbind(adat, mape_ij_dp20)[,c(1, 2, 8)] #2877*4


## put in wide format to merge with the shapefile ##
adat_wide<-merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7

## extract CT shapefile ##
ma_shp<-tracts(state = 'MA',year=2010)

## merge with pop and rate data ##
ma_shp<-merge(ma_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ma_shp<-st_as_sf(ma_shp)  #1478*21


shp_plot0527<-gather(ma_shp,'race','fit0527_mape',
                 c(mape_ij_dp20_w,mape_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('mape_ij_dp20_w','mape_ij_dp20_b'),
                         to=c('NHW','Black'))
#quantile(shp_plot0527$fit0527_mape, na.rm = T)
# 0.1589031 0.3059507 0.5038545 0.6941363 1.7476195 
ma_q<-c(0,0.21,0.466, 0.722, 0.978,1.234,1.49,max(shp_plot0527$fit0527_mape,na.rm = T)+1)


shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_mape,breaks=ma_q,
                             labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F)

## race-stratified expected counts, MA ##
ma_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "YlOrRd", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_text(size=18, face="bold"),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)
ma_maps0527

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  ma_maps0527+ guides(fill = guide_legend(nrow = 1)) + 
         #  scale_fill_discrete(na.translate = F)+
    theme(legend.direction = "horizontal",
          legend.key.size = unit(.5, 'cm'),
          legend.text = element_text(size=10),
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)))


## read in boston shapefiles ##
bos_shp<-st_read('../boston_ct/Census_2010_Tracts.shp')
bos_shp<-merge(bos_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

bos_bbox<-as.numeric(st_bbox(bos_shp))

xlim<-c(bos_bbox[1], bos_bbox[1]+.4*(bos_bbox[3]-bos_bbox[1]))
ylim<-c(bos_bbox[2], bos_bbox[2]+.8*(bos_bbox[4]-bos_bbox[2]))

## remove CTs with zero population (these are the islands off the MA coast) ##
#bos_shp<-bos_shp[which(bos_shp$exp_counts_ce_w!="NA" | bos_shp$exp_counts_ce_b!="NA"),]

## long form by race ##
shp_plot0527<-gather(bos_shp,'race','fit0527_mape',
                 c(mape_ij_dp20_w,mape_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('mape_ij_dp20_w','mape_ij_dp20_b'),
                         to=c('NHW','Black'))

shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_mape,breaks=ma_q,
   labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F,levels=levels)

## race-stratified expected counts, BOS ##  
bos_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  #scale_fill_brewer(palette = "YlOrRd", na.value = "white")+
  scale_fill_manual(values=brewer.pal(n = 7, name = "YlOrRd")[-1], na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
       legend.background = element_rect(fill = "white", colour = NA),
       legend.position = "bottom", 
       legend.key = element_rect(fill = "white", colour = NA),
       panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

m0527<-cowplot::plot_grid(ma_maps0527+ theme(legend.position="none"),
                          bos_maps0527+ theme(legend.position="none"),
                          legend,
                          ncol=1, 
                           rel_heights = c(2,2,.1))

```

## maps of MAPE for 2020-05-27 version 
### GA

```{r GA MAPE0527 plot}

load("../GA/sim/bw_adat5.RData")
adat_ga <- adat
adat_ga <- cbind(adat_ga, ga_mape_ij_dp20)[,c(1, 2, 8)] #2877*4
# 
# 
# ## put in wide format to merge with the shapefile ##
adat_wide_ga<-merge(adat_ga[which(adat_ga$race=='white'),],adat_ga[which(adat_ga$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7
## extract CT shapefile ##
ga_shp<-tracts(state = 'GA',year=2010)

## merge with pop and rate data ##
ga_shp<-merge(ga_shp,adat_wide_ga,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ga_shp<-st_as_sf(ga_shp)  # 1969 *19


shp_plot0527<-gather(ga_shp,'race','fit0527_mape',
                 c(ga_mape_ij_dp20_w,ga_mape_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('ga_mape_ij_dp20_w','ga_mape_ij_dp20_b'),
                         to=c('NHW','Black'))
#quantile(shp_plot0527$fit0527_mape, na.rm = T)
# 0.1589031 0.3059507 0.5038545 0.6941363 1.7476195 
#ma_q<-c(0,0.21,0.466, 0.722, 0.978,1.234,1.49,max(shp_plot0527$fit0527_mape,na.rm = T)+1)
#ga_q<-c(0,0.130, 0.297, 0.464, 0.631, 0.798,0.965,max(shp_plot0527$fit0527_mape,na.rm = T)+1)
ga_q = ma_q
shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_mape,breaks=ga_q,
                             labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F)

## race-stratified expected counts, MA ##
ga_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "YlOrRd", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)
# ga_maps0527
# 
# legend <- cowplot::get_legend(
#   # create some space to the left of the legend
#   ga_maps0527+ guides(fill = guide_legend(nrow = 1)) + 
#          #  scale_fill_discrete(na.translate = F)+
#     theme(legend.direction = "horizontal",
#           legend.key.size = unit(.5, 'cm'),
#           legend.text = element_text(size=10),
#             legend.background = element_rect(fill = "white", colour = NA),
#             legend.position = "bottom", 
#             legend.key = element_rect(fill = "white", colour = NA),
#             panel.grid.major = element_line(colour = 'transparent'),
#         panel.grid.minor = element_line(colour = 'transparent'),
#         legend.box.background = element_rect(fill = "white", colour = NA)))
# 

## read in boston shapefiles ##
fulton_shp<-tracts(state = 'GA', county =c("Fulton","Dekalb") , year=2010)

## merge with pop and rate data ##
fulton_shp<-merge(fulton_shp,adat_wide_ga,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
fulton_shp<-st_as_sf(fulton_shp)  #1478*21


## manually extract the NAME10 in Atlanta City ##
dekalb_name <- read.table("./shp_files/atl_ct/DC10CT_C13089_CT2MS.txt", header = TRUE,sep = ";")
#dekalb_name <- dekalb_name[c(1:11, 111:112,203),] 2020 DC map
dekalb_name <- dekalb_name[c(1:10),]

fulton_name <- read.table("./shp_files/atl_ct/DC10CT_C13121_CT2MS.txt", header = TRUE,sep = ";")
#fulton_name <- fulton_name[c(1:157,321:325),]
fulton_name <- fulton_name[c(1:114,135, 202,204),]



atl_name <- rbind(dekalb_name, fulton_name)

## remove CTs with zero population (these are the islands off the MA coast) ##
atl_shp<-fulton_shp[fulton_shp$GEOID10 %in% atl_name$CODE,]

atl_bbox<-as.numeric(st_bbox(atl_shp))

# xlim<-c(atl_bbox[1], atl_bbox[1]+.4*(atl_bbox[3]-atl_bbox[1]))
# ylim<-c(atl_bbox[2], atl_bbox[2]+.8*(atl_bbox[4]-atl_bbox[2]))

xlim<-c(atl_bbox[1], atl_bbox[3])
ylim<-c(atl_bbox[2], atl_bbox[4])


## long form by race ##
shp_plot0527<-gather(atl_shp,'race','ga_fit0527_mape',
                 c(ga_mape_ij_dp20_w,ga_mape_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('ga_mape_ij_dp20_w','ga_mape_ij_dp20_b'),
                         to=c('NHW','Black'))

shp_plot0527$exp_factor<-cut(shp_plot0527$ga_fit0527_mape,breaks=ma_q,
   labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F,levels=levels)

## race-stratified expected counts, BOS ##  
atl_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  #scale_fill_brewer(palette = "YlOrRd", na.value = "white")+
  scale_fill_manual(values=brewer.pal(n = 7, name = "YlOrRd")[-1], na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
       legend.background = element_rect(fill = "white", colour = NA),
       legend.position = "bottom", 
       legend.key = element_rect(fill = "white", colour = NA),
       panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

m0527<-cowplot::plot_grid(ma_maps0527+ theme(legend.position="none"),
                          bos_maps0527+ theme(legend.position="none"),
                          ga_maps0527+ theme(legend.position="none"),
                          atl_maps0527+ theme(legend.position="none"),
                          legend,
                          ncol=1, 
                           rel_heights = c(3,2.5,3,2.5,.22))

```









### maps of bias for 2020-05-27 version

```{r}
#### bias0527 plot
coast <- c("9900", "9900.03", "9901", "9901.01")


load('../bw_adat5.RData')
adat <- cbind(adat, bias_ij_dp20)[,c(1, 2, 9)] #2877*4


## put in wide format to merge with the shapefile ##
adat_wide<-merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7

## extract CT shapefile ##
ma_shp<-tracts(state = 'MA',year=2010)

## merge with pop and rate data ##
ma_shp<-merge(ma_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ma_shp<-st_as_sf(ma_shp)  #1478*21

## remove CTs with zero population (these are the islands off the MA coast) ##
ma_shp<-ma_shp[!ma_shp$NAME10 %in% coast,]

shp_plot0527<-gather(ma_shp,'race','fit0527_bias',
                 c(bias_ij_dp20_w,bias_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('bias_ij_dp20_w','bias_ij_dp20_b'),
                         to=c('NHW','Black'))
# quantile(shp_plot0527$fit0527_bias, na.rm = T)
# -1.21238734 -0.06270235  0.03549275  0.14697380  7.08009385 
# sort(shp_plot0527$fit0527_bias, decreasing = TRUE)
#   [1] 7.08009385 1.78895720

ma_q<-c(min(shp_plot0527$fit0527_bias,na.rm = T)-1,-0.104,-0.062, -0.020, 0.021,0.063,0.105,max(shp_plot0527$fit0527_bias,na.rm = T)+1)


shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_bias,breaks=ma_q,
                              labels=c('<-0.104','[-0.104, -0.062)','[ -0.062,-0.020)','[-0.020,0.021)','[0.021,0.063)',
                                      '[0.063,0.105)','0.105+'),right=F)

## race-stratified expected counts, MA ##
ma_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "BrBG", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_text(size=18, face="bold"),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)
ma_maps0527


legend <- cowplot::get_legend(
  # create some space to the left of the legend
  ma_maps0527+ guides(fill = guide_legend(nrow = 1)) + 
         #  scale_fill_discrete(na.translate = F)+
    theme(legend.direction = "horizontal",
          legend.key.size = unit(.5, 'cm'),
          legend.text = element_text(size=10),
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)))




## read in boston shapefiles ##
bos_shp<-st_read('../boston_ct/Census_2010_Tracts.shp')
bos_shp<-merge(bos_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

bos_bbox<-as.numeric(st_bbox(bos_shp))

xlim<-c(bos_bbox[1], bos_bbox[1]+.4*(bos_bbox[3]-bos_bbox[1]))
ylim<-c(bos_bbox[2], bos_bbox[2]+.8*(bos_bbox[4]-bos_bbox[2]))

# remove CTs with zero population (these are the islands off the MA coast) ##
bos_shp<-bos_shp[!bos_shp$NAME10 %in% coast,]


## long form by race ##
shp_plot0527<-gather(bos_shp,'race','fit0527_bias',
                 c(bias_ij_dp20_w,bias_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('bias_ij_dp20_w','bias_ij_dp20_b'),
                         to=c('NHW','Black'))
# quantile(shp_plot0527$fit0527_bias, na.rm=TRUE)
# -0.88785804 -0.10397695  0.03460298  0.20137904  7.08009385

shp_plot0527$exp_factor<-cut(shp_plot0527$fit0527_bias,breaks=ma_q,
                             labels=c('<-0.104','[-0.104, -0.062)','[ -0.062,-0.020)','[-0.020,0.021)','[0.021,0.063)',
                                      '[0.063,0.105)','0.105+'),right=F)

## race-stratified expected counts, BOS ##  
bos_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  #scale_fill_brewer(palette = "YlOrRd", na.value = "white")+
  scale_fill_manual(values=brewer.pal(n = 7, name = "BrBG"), na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
       legend.background = element_rect(fill = "white", colour = NA),
       legend.position = "bottom", 
       legend.key = element_rect(fill = "white", colour = NA),
       panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

```




```{r GA bias map}
#### bias0527 plot
coast <- c("9900")
load("../GA/sim/bw_adat5.RData")

adat <- cbind(adat, ga_bias_ij_dp20)[,c(1, 2, 8)] #2877*4


## put in wide format to merge with the shapefile ##
adat_wide<-merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7

#  1418*7
## extract CT shapefile ##
ga_shp<-tracts(state = 'GA',year=2010)

## merge with pop and rate data ##
ga_shp<-merge(ga_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ga_shp<-st_as_sf(ga_shp)  # 1969 *19

## remove CTs with zero population (these are the islands off the MA coast) ##
ga_shp<-ga_shp[!ga_shp$NAME10 %in% coast,]

shp_plot0527<-gather(ga_shp,'race','ga_fit0527_bias',
                 c(ga_bias_ij_dp20_w,ga_bias_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('ga_bias_ij_dp20_w','ga_bias_ij_dp20_b'),
                         to=c('NHW','Black'))
ga_q = ma_q
shp_plot0527$exp_factor<-cut(shp_plot0527$ga_fit0527_bias,breaks=ga_q,
                              labels=c('<-0.104','[-0.104, -0.062)','[ -0.062,-0.020)','[-0.020,0.021)','[0.021,0.063)',
                                      '[0.063,0.105)','0.105+'),right=F)

## race-stratified expected counts, MA ##
ga_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "BrBG", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
    legend.direction = "horizontal",
            legend.background = element_rect(fill = "white", colour = NA),
            legend.position = "bottom", 
            legend.key = element_rect(fill = "white", colour = NA),
            panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)

 
## read in atlanta shapefiles ##
fulton_shp<-tracts(state = 'GA', county =c("Fulton","Dekalb") , year=2010)

## merge with pop and rate data ##
fulton_shp<-merge(fulton_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
fulton_shp<-st_as_sf(fulton_shp)  #1478*21


## manually extract the NAME10 in Atlanta City ##
dekalb_name <- read.table("./shp_files/atl_ct/DC10CT_C13089_CT2MS.txt", header = TRUE,sep = ";")
#dekalb_name <- dekalb_name[c(1:11, 111:112,203),] 2020 DC map
dekalb_name <- dekalb_name[c(1:10),]

fulton_name <- read.table("./shp_files/atl_ct/DC10CT_C13121_CT2MS.txt", header = TRUE,sep = ";")
#fulton_name <- fulton_name[c(1:157,321:325),]
fulton_name <- fulton_name[c(1:114,135, 202,204),]



atl_name <- rbind(dekalb_name, fulton_name)

## remove CTs with zero population (these are the islands off the MA coast) ##
atl_shp<-fulton_shp[fulton_shp$GEOID10 %in% atl_name$CODE,]

atl_bbox<-as.numeric(st_bbox(atl_shp))

# xlim<-c(atl_bbox[1], atl_bbox[1]+.4*(atl_bbox[3]-atl_bbox[1]))
# ylim<-c(atl_bbox[2], atl_bbox[2]+.8*(atl_bbox[4]-atl_bbox[2]))

xlim<-c(atl_bbox[1], atl_bbox[3])
ylim<-c(atl_bbox[2], atl_bbox[4])


## long form by race ##
shp_plot0527<-gather(atl_shp,'race','ga_fit0527_bias',
                 c(ga_bias_ij_dp20_w,ga_bias_ij_dp20_b),factor_key = T)

shp_plot0527$race<-mapvalues(shp_plot0527$race,
                         from=c('ga_bias_ij_dp20_w','ga_bias_ij_dp20_b'),
                         to=c('NHW','Black'))

shp_plot0527$exp_factor<-cut(shp_plot0527$ga_fit0527_bias,breaks=ga_q,
   labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F,levels=levels)

## race-stratified expected counts, BOS ##  
atl_maps0527<-ggplot(shp_plot0527, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  #scale_fill_brewer(palette = "YlOrRd", na.value = "white")+
  scale_fill_manual(values=brewer.pal(n = 7, name = "BrBG"), na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
       legend.background = element_rect(fill = "white", colour = NA),
       legend.position = "bottom", 
       legend.key = element_rect(fill = "white", colour = NA),
       panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)




b0527<-cowplot::plot_grid(ma_maps0527+ theme(legend.position="none"),
                          bos_maps0527+ theme(legend.position="none"),
                          ga_maps0527+ theme(legend.position="none"),
                          atl_maps0527+ theme(legend.position="none"),
                          legend,
                          ncol=1, 
                           rel_heights = c(3,2.5,3,2.5,.22))


```


## maps of MAPE for 2022-08-25 version 
### MA+GA

```{r MAPE0825 plot}
#### MAPE2208 plot
load('../bw_adat.RData')
adat <- cbind(adat, mape_ij_dp2208)[,c(1, 2, 8)] #2877*4


## put in wide format to merge with the shapefile ##
adat_wide<-merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7

## extract CT shapefile ##
ma_shp<-tracts(state = 'MA',year=2010)

## merge with pop and rate data ##
ma_shp<-merge(ma_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ma_shp<-st_as_sf(ma_shp)  #1478*21


shp_plot2208<-gather(ma_shp,'race','fit2208_mape',
                 c(mape_ij_dp2208_w,mape_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('mape_ij_dp2208_w','mape_ij_dp2208_b'),
                         to=c('NHW','Black'))

ma_q<-c(0,0.21,0.466, 0.722, 0.978,1.234,1.49,max(shp_plot2208$fit2208_mape,na.rm = T)+1)


shp_plot2208$exp_factor<-cut(shp_plot2208$fit2208_mape,breaks=ma_q,
                             labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F)

## race-stratified expected counts, MA ##
ma_maps2208<-ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "YlOrRd", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_text(size=18, face="bold"),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
   facet_wrap(~ race)
ma_maps2208

legend <- cowplot::get_legend(
  # create some space to the left of the legend
  ma_maps2208+ guides(fill = guide_legend(nrow = 1)) + 
         #  scale_fill_discrete(na.translate = F)+
    theme(legend.direction = "horizontal",
          legend.key.size = unit(.5, 'cm'),
          legend.text = element_text(size=10),
          legend.background = element_rect(fill = "white", colour = NA),
          legend.position = "bottom", 
          legend.key = element_rect(fill = "white", colour = NA),
          panel.grid.major = element_line(colour = 'transparent'),
          panel.grid.minor = element_line(colour = 'transparent'),
          legend.box.background = element_rect(fill = "white", colour = NA)))


## read in boston shapefiles ##
bos_shp<-st_read('../boston_ct/Census_2010_Tracts.shp')
bos_shp<-merge(bos_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

bos_bbox<-as.numeric(st_bbox(bos_shp))

xlim<-c(bos_bbox[1], bos_bbox[1]+.4*(bos_bbox[3]-bos_bbox[1]))
ylim<-c(bos_bbox[2], bos_bbox[2]+.8*(bos_bbox[4]-bos_bbox[2]))

## remove CTs with zero population (these are the islands off the MA coast) ##
#bos_shp<-bos_shp[which(bos_shp$exp_counts_ce_w!="NA" | bos_shp$exp_counts_ce_b!="NA"),]

## long form by race ##
shp_plot2208<-gather(bos_shp,'race','fit2208_mape',
                 c(mape_ij_dp2208_w,mape_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('mape_ij_dp2208_w','mape_ij_dp2208_b'),
                         to=c('NHW','Black'))

shp_plot2208$exp_factor<-cut(shp_plot2208$fit2208_mape,breaks=ma_q,
   labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)','[1.234,1.490)','1.490+'),right=F,levels=levels)

## race-stratified expected counts, BOS ##  
bos_maps2208<-ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  #scale_fill_brewer(palette = "YlOrRd", na.value = "white")+
  scale_fill_manual(values=brewer.pal(n = 7, name = "YlOrRd")[-1], na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

m2208<-cowplot::plot_grid(ma_maps2208+ theme(legend.position="none"),
                          bos_maps2208+ theme(legend.position="none"),
                          legend,ncol=1, rel_heights = c(2,2,.1))

load("../GA/sim/bw_adat5.RData")
adat_ga <- adat
adat_ga <- cbind(adat_ga, ga_mape_ij_dp2208)[,c(1, 2, 8)] #2877*4
# 
# ## put in wide format to merge with the shapefile ##
adat_wide_ga<-merge(adat_ga[which(adat_ga$race=='white'),],adat_ga[which(adat_ga$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
## extract CT shapefile ##
ga_shp<-tracts(state = 'GA',year=2010)

## merge with pop and rate data ##
ga_shp<-merge(ga_shp,adat_wide_ga,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ga_shp<-st_as_sf(ga_shp)  # 1969 *19


shp_plot2208<-gather(ga_shp,'race','fit2208_mape',
                 c(ga_mape_ij_dp2208_w,ga_mape_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('ga_mape_ij_dp2208_w','ga_mape_ij_dp2208_b'),
                         to=c('NHW','Black'))

ga_q = ma_q
shp_plot2208$exp_factor<-cut(shp_plot2208$fit2208_mape,breaks=ga_q,
                             labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F)

## race-stratified expected counts, MA ##
ga_maps2208<-ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "YlOrRd", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)

## read in boston shapefiles ##
fulton_shp<-tracts(state = 'GA', county =c("Fulton","Dekalb") , year=2010)

## merge with pop and rate data ##
fulton_shp<-merge(fulton_shp,adat_wide_ga,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
fulton_shp<-st_as_sf(fulton_shp)  #1478*21


## manually extract the NAME10 in Atlanta City ##
dekalb_name <- read.table("./shp_files/atl_ct/DC10CT_C13089_CT2MS.txt", header = TRUE,sep = ";")
#dekalb_name <- dekalb_name[c(1:11, 111:112,203),] 2020 DC map
dekalb_name <- dekalb_name[c(1:10),]

fulton_name <- read.table("./shp_files/atl_ct/DC10CT_C13121_CT2MS.txt", header = TRUE,sep = ";")
#fulton_name <- fulton_name[c(1:157,321:325),]
fulton_name <- fulton_name[c(1:114,135, 202,204),]

atl_name <- rbind(dekalb_name, fulton_name)

## remove CTs with zero population (these are the islands off the MA coast) ##
atl_shp<-fulton_shp[fulton_shp$GEOID10 %in% atl_name$CODE,]

atl_bbox<-as.numeric(st_bbox(atl_shp))

xlim<-c(atl_bbox[1], atl_bbox[3])
ylim<-c(atl_bbox[2], atl_bbox[4])

## long form by race ##
shp_plot2208 <- gather(atl_shp,'race','ga_fit2208_mape',
                 c(ga_mape_ij_dp2208_w,ga_mape_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('ga_mape_ij_dp2208_w','ga_mape_ij_dp2208_b'),
                         to=c('NHW','Black'))

shp_plot2208$exp_factor<-cut(shp_plot2208$ga_fit2208_mape,breaks=ma_q,
   labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F,levels=levels)

## race-stratified expected counts, BOS ##  
atl_maps2208<-ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  scale_fill_manual(values=brewer.pal(n = 7, name = "YlOrRd")[-1], na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

m2208<-cowplot::plot_grid(ma_maps2208+ theme(legend.position="none"),
                          bos_maps2208+ theme(legend.position="none"),
                          ga_maps2208+ theme(legend.position="none"),
                          atl_maps2208+ theme(legend.position="none"),
                          legend,ncol=1, rel_heights = c(3,2.5,3,2.5,.22))

```

### maps of bias for 2022-08-25 version

```{r}
coast <- c("9900", "9900.03", "9901", "9901.01")

load('../bw_adat5.RData')
adat <- cbind(adat, bias_ij_dp2208)[,c(1, 2, 9)] #2877*4


## put in wide format to merge with the shapefile ##
adat_wide<-merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))
#  1418*7

## extract CT shapefile ##
ma_shp<-tracts(state = 'MA',year=2010)

## merge with pop and rate data ##
ma_shp<-merge(ma_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ma_shp<-st_as_sf(ma_shp)  #1478*21

## remove CTs with zero population (these are the islands off the MA coast) ##
ma_shp<-ma_shp[!ma_shp$NAME10 %in% coast,]

shp_plot2208<-gather(ma_shp,'race','fit2208_bias',
                 c(bias_ij_dp2208_w,bias_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('bias_ij_dp2208_w','bias_ij_dp2208_b'),
                         to=c('NHW','Black'))

ma_q<-c(min(shp_plot2208$fit2208_bias,na.rm = T)-1,-0.104,-0.062, -0.020, 0.021,0.063,0.105,max(shp_plot2208$fit2208_bias,na.rm = T)+1)


shp_plot2208$exp_factor<-cut(shp_plot2208$fit2208_bias,breaks=ma_q,
                              labels=c('<-0.104','[-0.104, -0.062)','[-0.062,-0.020)','[-0.020,0.021)','[0.021,0.063)','[0.063,0.105)','0.105+'),right=F)

## race-stratified expected counts, MA ##
ma_maps2208<-ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "BrBG", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_text(size=18, face="bold"),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)
ma_maps2208


legend <- cowplot::get_legend(
  # create some space to the left of the legend
  ma_maps2208 + guides(fill = guide_legend(nrow = 1)) + 
    theme(legend.direction = "horizontal",
          legend.key.size = unit(.5, 'cm'),
          legend.text = element_text(size=10),
          legend.background = element_rect(fill = "white", colour = NA),
          legend.position = "bottom", 
          legend.key = element_rect(fill = "white", colour = NA),
          panel.grid.major = element_line(colour = 'transparent'),
          panel.grid.minor = element_line(colour = 'transparent'),
          legend.box.background = element_rect(fill = "white", colour = NA)))

bos_shp<-st_read('../boston_ct/Census_2010_Tracts.shp')
bos_shp<-merge(bos_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

bos_bbox<-as.numeric(st_bbox(bos_shp))

xlim<-c(bos_bbox[1], bos_bbox[1]+.4*(bos_bbox[3]-bos_bbox[1]))
ylim<-c(bos_bbox[2], bos_bbox[2]+.8*(bos_bbox[4]-bos_bbox[2]))

# remove CTs with zero population (these are the islands off the MA coast) ##
bos_shp<-bos_shp[!bos_shp$NAME10 %in% coast,]

## long form by race ##
shp_plot2208 <- gather(bos_shp,'race','fit2208_bias',
                 c(bias_ij_dp2208_w,bias_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('bias_ij_dp2208_w','bias_ij_dp2208_b'),
                         to=c('NHW','Black'))

shp_plot2208$exp_factor<-cut(shp_plot2208$fit2208_bias,breaks=ma_q,
                             labels=c('<-0.104','[-0.104, -0.062)','[-0.062,-0.020)','[-0.020,0.021)','[0.021,0.063)','[0.063,0.105)','0.105+'),right=F)

## race-stratified expected counts, BOS ##  
bos_maps2208 <- ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  scale_fill_manual(values=brewer.pal(n = 7, name = "BrBG"), na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

coast <- c("9900")
load("../GA/sim/bw_adat5.RData")

adat <- cbind(adat, ga_bias_ij_dp2208)[,c(1, 2, 8)] #2877*4

## put in wide format to merge with the shapefile ##
adat_wide <- merge(adat[which(adat$race=='white'),],adat[which(adat$race=='black'),],by='GEOID',suffixes=c('_w','_b'))

ga_shp<-tracts(state = 'GA', year=2010)

## merge with pop and rate data ##
ga_shp<-merge(ga_shp,adat_wide,by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
ga_shp<-st_as_sf(ga_shp)  # 1969 *19

## remove CTs with zero population (these are the islands off the MA coast) ##
ga_shp <- ga_shp[!ga_shp$NAME10 %in% coast,]

shp_plot2208 <- gather(ga_shp,'race','ga_fit2208_bias',
                 c(ga_bias_ij_dp2208_w,ga_bias_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('ga_bias_ij_dp2208_w','ga_bias_ij_dp2208_b'),
                         to=c('NHW','Black'))
ga_q = ma_q
shp_plot2208$exp_factor<-cut(shp_plot2208$ga_fit2208_bias,breaks=ga_q,
                              labels=c('<-0.104','[-0.104, -0.062)','[-0.062,-0.020)','[-0.020,0.021)','[0.021,0.063)','[0.063,0.105)','0.105+'),right=F)

## race-stratified expected counts, MA ##
ga_maps2208 <- ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  scale_fill_brewer(palette = "BrBG", na.value = "white", na.translate = F)+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA))+
  facet_wrap(~ race)

## read in atlanta shapefiles ##
fulton_shp<-tracts(state = 'GA', county =c("Fulton","Dekalb"), year=2010)

## merge with pop and rate data ##
fulton_shp<-merge(fulton_shp, adat_wide, by.x='GEOID10',by.y='GEOID',all.x=T)

## put in sf format to use nice mapping packages ##
fulton_shp<-st_as_sf(fulton_shp)  #1478*21

## manually extract the NAME10 in Atlanta City ##
dekalb_name <- read.table("./shp_files/atl_ct/DC10CT_C13089_CT2MS.txt", header = TRUE,sep = ";")
#dekalb_name <- dekalb_name[c(1:11, 111:112,203),] 2020 DC map
dekalb_name <- dekalb_name[c(1:10),]

fulton_name <- read.table("./shp_files/atl_ct/DC10CT_C13121_CT2MS.txt", header = TRUE,sep = ";")
#fulton_name <- fulton_name[c(1:157,321:325),]
fulton_name <- fulton_name[c(1:114,135, 202,204),]

atl_name <- rbind(dekalb_name, fulton_name)

## remove CTs with zero population (these are the islands off the MA coast) ##
atl_shp<-fulton_shp[fulton_shp$GEOID10 %in% atl_name$CODE,]

atl_bbox<-as.numeric(st_bbox(atl_shp))

xlim<-c(atl_bbox[1], atl_bbox[3])
ylim<-c(atl_bbox[2], atl_bbox[4])

## long form by race ##
shp_plot2208<-gather(atl_shp,'race','ga_fit2208_bias',
                 c(ga_bias_ij_dp2208_w,ga_bias_ij_dp2208_b),factor_key = T)

shp_plot2208$race<-mapvalues(shp_plot2208$race,
                         from=c('ga_bias_ij_dp2208_w','ga_bias_ij_dp2208_b'),
                         to=c('NHW','Black'))

shp_plot2208$exp_factor<-cut(shp_plot2208$ga_fit2208_bias,breaks=ga_q,
   labels=c('<0.210','[0.210,0.466)','[0.466,0.722)','[0.722,0.978)','[0.978,1.234)',
                                      '[1.234,1.490)','1.490+'),right=F,levels=levels)

## race-stratified expected counts, BOS ##  
atl_maps2208 <- ggplot(shp_plot2208, aes(fill = exp_factor)) +
  geom_sf(colour=NA) +
  coord_sf(xlim=xlim,ylim=ylim)+
  #scale_fill_brewer(palette = "YlOrRd", na.value = "white")+
  scale_fill_manual(values=brewer.pal(n = 7, name = "BrBG"), na.value = "white")+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.x=element_blank(),
        axis.ticks.y=element_blank(),
        axis.title.y=element_blank(),
        legend.title=element_blank(),
        text=element_text(size=18),
        strip.text.x = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        panel.background = element_rect(fill = "white", colour = NA),
        legend.direction = "horizontal",
        legend.background = element_rect(fill = "white", colour = NA),
        legend.position = "bottom", 
        legend.key = element_rect(fill = "white", colour = NA),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_line(colour = 'transparent'),
        legend.box.background = element_rect(fill = "white", colour = NA)) +
  facet_wrap(~ race)

b2208 <- cowplot::plot_grid(ma_maps2208 + theme(legend.position="none"),
                          bos_maps2208 + theme(legend.position="none"),
                          ga_maps2208 + theme(legend.position="none"),
                          atl_maps2208 + theme(legend.position="none"),
                          legend,ncol=1,rel_heights = c(3,2.5,3,2.5,.22))

```

















